# Introduction


This is the description of the data processing procedure of paper entitled "The influence of proximity and knowledge base on recombination innovation in R&D Collaboration" submitted to the journal Plos One.

You can contact the corresponding author at nanding@ustb.edu.cn

- This repository has the datasets analyzed in the paper and R codes to run analyses and that produced the figures.
- The datasets includes: joint-patent between organizations in the industry named as 'co-patent-original'; original nanotechnology patent named as 'nanotechnology_patent_1978-2014' and 'patent'; re-structure original nanotechnology patent as 'nanotechnology_patent_1978-2014_assignee_IPC'; sample measured variable as 'paper_variable';variable used in the regressioin as 'regression_variable'.
- the datasets from the year of 2015-2018 is complemented afterwards.

## Data import (nanotechnology patent)

- Import the nanotechnology patent 'patent' and restructure the data ('paper_variable' is calculated variables from the previous study)

paper<-paper_variable[,1:24]
v1<-patent[c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)]
v2<-patent[c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,17)]
v3<-patent[c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,18)]
v4<-patent[c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,19)]
v5<-patent[c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,20)]
v6<-patent[c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,21)]

names(v1)<-c('No','year','apply_date','granted_date','IPC_1','IPC_2','IPC_3','IPC_4','IPC_5','IPC_6','IPC_7','IPC','inventor','assinee','inventor_code','assinee_') 
names(v2)<-c('No','year','apply_date','granted_date','IPC_1','IPC_2','IPC_3','IPC_4','IPC_5','IPC_6','IPC_7','IPC','inventor','assinee','inventor_code','assinee_') 
names(v3)<-c('No','year','apply_date','granted_date','IPC_1','IPC_2','IPC_3','IPC_4','IPC_5','IPC_6','IPC_7','IPC','inventor','assinee','inventor_code','assinee_') 
names(v4)<-c('No','year','apply_date','granted_date','IPC_1','IPC_2','IPC_3','IPC_4','IPC_5','IPC_6','IPC_7','IPC','inventor','assinee','inventor_code','assinee_') 
names(v5)<-c('No','year','apply_date','granted_date','IPC_1','IPC_2','IPC_3','IPC_4','IPC_5','IPC_6','IPC_7','IPC','inventor','assinee','inventor_code','assinee_') 
names(v6)<-c('No','year','apply_date','granted_date','IPC_1','IPC_2','IPC_3','IPC_4','IPC_5','IPC_6','IPC_7','IPC','inventor','assinee','inventor_code','assinee_') 

v<-rbind(v1,v2,v3,v4,v5,v6)
write.csv(v, file = "patent_assignee.csv", row.names = FALSE,sep=",")

vv1<-v[c(1,2,3,4,5,12,13,14,15,16)]
vv2<-v[c(1,2,3,4,6,12,13,14,15,16)]
vv3<-v[c(1,2,3,4,7,12,13,14,15,16)]
vv4<-v[c(1,2,3,4,8,12,13,14,15,16)]
vv5<-v[c(1,2,3,4,9,12,13,14,15,16)]
vv6<-v[c(1,2,3,4,10,12,13,14,15,16)]
vv7<-v[c(1,2,3,4,11,12,13,14,15,16)]
names(vv1)<-c('No','year','apply_date','granted_date','IPC_','IPC','inventor','assinee','inventor_code','assinee_') 
names(vv2)<-c('No','year','apply_date','granted_date','IPC_','IPC','inventor','assinee','inventor_code','assinee_') 
names(vv3)<-c('No','year','apply_date','granted_date','IPC_','IPC','inventor','assinee','inventor_code','assinee_') 
names(vv4)<-c('No','year','apply_date','granted_date','IPC_','IPC','inventor','assinee','inventor_code','assinee_') 
names(vv5)<-c('No','year','apply_date','granted_date','IPC_','IPC','inventor','assinee','inventor_code','assinee_') 
names(vv6)<-c('No','year','apply_date','granted_date','IPC_','IPC','inventor','assinee','inventor_code','assinee_') 
names(vv7)<-c('No','year','apply_date','granted_date','IPC_','IPC','inventor','assinee','inventor_code','assinee_') 
vv<-rbind(vv1,vv2,vv3,vv4,vv5,vv6,vv7)

write.csv(vv, file = "patent_assignee_IPC.csv", row.names = FALSE,sep=",")

v<-v[c(1,2,3,4,12,13,14,15,16)]
v[v==""]<-NA
v<-na.omit(v)
write.csv(v, file = "patent_assignee_nospace.csv", row.names = FALSE,sep=",")

vv[vv==""]<-NA
vv<-na.omit(vv)
write.csv(vv, file = "patent_assignee_IPC_nospace.csv", row.names = FALSE,sep=",")

## Construction of the organizations' knowledge network 

- Using the re-structure patent data 'v', contructing the knowledge network using IPC classification code and five-year moving window

a1<-subset(vv,year>1989&year<1995)
a2<-subset(vv,year>1990&year<1996)
a3<-subset(vv,year>1991&year<1997)
a4<-subset(vv,year>1992&year<1998)
a5<-subset(vv,year>1993&year<1999)
a6<-subset(vv,year>1994&year<2000)
a7<-subset(vv,year>1995&year<2001)
a8<-subset(vv,year>1996&year<2002)
a9<-subset(vv,year>1997&year<2003)
a10<-subset(vv,year>1998&year<2004)
a11<-subset(vv,year>1999&year<2005)
a12<-subset(vv,year>2000&year<2006)
a13<-subset(vv,year>2001&year<2007)
a14<-subset(vv,year>2002&year<2008)
a15<-subset(vv,year>2003&year<2009)
a16<-subset(vv,year>2004&year<2010)
a17<-subset(vv,year>2005&year<2011)
a18<-subset(vv,year>2006&year<2012)
a19<-subset(vv,year>2007&year<2013)

library(igraph)
mm<-a1[c(1,5)]
temp3<-merge(mm,mm,by='No')
temp4<-temp3[temp3$IPC_.x!=temp3$IPC_.y,]
temp5<-temp4[,c(2,3)]
graph<-graph.data.frame(d=temp5,directed=F,vertices=unique(c(temp5$IPC_x,temp5$IPC_y)))
graph1<-simplify(graph)
mm<-a2[c(1,5)]
temp3<-merge(mm,mm,by='No')
temp4<-temp3[temp3$IPC_.x!=temp3$IPC_.y,]
temp5<-temp4[,c(2,3)]
graph<-graph.data.frame(d=temp5,directed=F,vertices=unique(c(temp5$IPC_x,temp5$IPC_y)))
graph2<-simplify(graph)
mm<-a3[c(1,5)]
temp3<-merge(mm,mm,by='No')
temp4<-temp3[temp3$IPC_.x!=temp3$IPC_.y,]
temp5<-temp4[,c(2,3)]
graph<-graph.data.frame(d=temp5,directed=F,vertices=unique(c(temp5$IPC_x,temp5$IPC_y)))
graph3<-simplify(graph)
mm<-a4[c(1,5)]
temp3<-merge(mm,mm,by='No')
temp4<-temp3[temp3$IPC_.x!=temp3$IPC_.y,]
temp5<-temp4[,c(2,3)]
graph<-graph.data.frame(d=temp5,directed=F,vertices=unique(c(temp5$IPC_x,temp5$IPC_y)))
graph4<-simplify(graph)
mm<-a5[c(1,5)]
temp3<-merge(mm,mm,by='No')
temp4<-temp3[temp3$IPC_.x!=temp3$IPC_.y,]
temp5<-temp4[,c(2,3)]
graph<-graph.data.frame(d=temp5,directed=F,vertices=unique(c(temp5$IPC_x,temp5$IPC_y)))
graph5<-simplify(graph)
mm<-a6[c(1,5)]
temp3<-merge(mm,mm,by='No')
temp4<-temp3[temp3$IPC_.x!=temp3$IPC_.y,]
temp5<-temp4[,c(2,3)]
graph<-graph.data.frame(d=temp5,directed=F,vertices=unique(c(temp5$IPC_x,temp5$IPC_y)))
graph6<-simplify(graph)
mm<-a7[c(1,5)]
temp3<-merge(mm,mm,by='No')
temp4<-temp3[temp3$IPC_.x!=temp3$IPC_.y,]
temp5<-temp4[,c(2,3)]
graph<-graph.data.frame(d=temp5,directed=F,vertices=unique(c(temp5$IPC_x,temp5$IPC_y)))
graph7<-simplify(graph)
mm<-a8[c(1,5)]
temp3<-merge(mm,mm,by='No')
temp4<-temp3[temp3$IPC_.x!=temp3$IPC_.y,]
temp5<-temp4[,c(2,3)]
graph<-graph.data.frame(d=temp5,directed=F,vertices=unique(c(temp5$IPC_x,temp5$IPC_y)))
graph8<-simplify(graph)
mm<-a9[c(1,5)]
temp3<-merge(mm,mm,by='No')
temp4<-temp3[temp3$IPC_.x!=temp3$IPC_.y,]
temp5<-temp4[,c(2,3)]
graph<-graph.data.frame(d=temp5,directed=F,vertices=unique(c(temp5$IPC_x,temp5$IPC_y)))
graph9<-simplify(graph)
mm<-a10[c(1,5)]
temp3<-merge(mm,mm,by='No')
temp4<-temp3[temp3$IPC_.x!=temp3$IPC_.y,]
temp5<-temp4[,c(2,3)]
graph<-graph.data.frame(d=temp5,directed=F,vertices=unique(c(temp5$IPC_x,temp5$IPC_y)))
graph10<-simplify(graph)
mm<-a11[c(1,5)]
temp3<-merge(mm,mm,by='No')
temp4<-temp3[temp3$IPC_.x!=temp3$IPC_.y,]
temp5<-temp4[,c(2,3)]
graph<-graph.data.frame(d=temp5,directed=F,vertices=unique(c(temp5$IPC_x,temp5$IPC_y)))
graph11<-simplify(graph)
mm<-a12[c(1,5)]
temp3<-merge(mm,mm,by='No')
temp4<-temp3[temp3$IPC_.x!=temp3$IPC_.y,]
temp5<-temp4[,c(2,3)]
graph<-graph.data.frame(d=temp5,directed=F,vertices=unique(c(temp5$IPC_x,temp5$IPC_y)))
graph12<-simplify(graph)
mm<-a13[c(1,5)]
temp3<-merge(mm,mm,by='No')
temp4<-temp3[temp3$IPC_.x!=temp3$IPC_.y,]
temp5<-temp4[,c(2,3)]
graph<-graph.data.frame(d=temp5,directed=F,vertices=unique(c(temp5$IPC_x,temp5$IPC_y)))
graph13<-simplify(graph)
mm<-a14[c(1,5)]
temp3<-merge(mm,mm,by='No')
temp4<-temp3[temp3$IPC_.x!=temp3$IPC_.y,]
temp5<-temp4[,c(2,3)]
graph<-graph.data.frame(d=temp5,directed=F,vertices=unique(c(temp5$IPC_x,temp5$IPC_y)))
graph14<-simplify(graph)
mm<-a15[c(1,5)]
temp3<-merge(mm,mm,by='No')
temp4<-temp3[temp3$IPC_.x!=temp3$IPC_.y,]
temp5<-temp4[,c(2,3)]
graph<-graph.data.frame(d=temp5,directed=F,vertices=unique(c(temp5$IPC_x,temp5$IPC_y)))
graph15<-simplify(graph)
mm<-a16[c(1,5)]
temp3<-merge(mm,mm,by='No')
temp4<-temp3[temp3$IPC_.x!=temp3$IPC_.y,]
temp5<-temp4[,c(2,3)]
graph<-graph.data.frame(d=temp5,directed=F,vertices=unique(c(temp5$IPC_x,temp5$IPC_y)))
graph16<-simplify(graph)
mm<-a17[c(1,5)]
temp3<-merge(mm,mm,by='No')
temp4<-temp3[temp3$IPC_.x!=temp3$IPC_.y,]
temp5<-temp4[,c(2,3)]
graph<-graph.data.frame(d=temp5,directed=F,vertices=unique(c(temp5$IPC_x,temp5$IPC_y)))
graph17<-simplify(graph)
mm<-a18[c(1,5)]
temp3<-merge(mm,mm,by='No')
temp4<-temp3[temp3$IPC_.x!=temp3$IPC_.y,]
temp5<-temp4[,c(2,3)]
graph<-graph.data.frame(d=temp5,directed=F,vertices=unique(c(temp5$IPC_x,temp5$IPC_y)))
graph18<-simplify(graph)
mm<-a19[c(1,5)]
temp3<-merge(mm,mm,by='No')
temp4<-temp3[temp3$IPC_.x!=temp3$IPC_.y,]
temp5<-temp4[,c(2,3)]
graph<-graph.data.frame(d=temp5,directed=F,vertices=unique(c(temp5$IPC_x,temp5$IPC_y)))
graph19<-simplify(graph)

- Measurement of knowledge network idexes using igraph

degree1<-degree(graph1,mode='total')
evcent1<-evcent(graph1,scale=F)$vector
constraint1<-constraint(graph1)
vertex1<-get.vertex.attribute(graph1)
network1<-cbind(vertex1$name,degree1,evcent1,constraint1)
degree2<-degree(graph2,mode='total')
evcent2<-evcent(graph2,scale=F)$vector
constraint2<-constraint(graph2)
vertex2<-get.vertex.attribute(graph2)
network2<-cbind(vertex2$name,degree2,evcent2,constraint2)
degree3<-degree(graph3,mode='total')
evcent3<-evcent(graph3,scale=F)$vector
constraint3<-constraint(graph3)
vertex3<-get.vertex.attribute(graph3)
network3<-cbind(vertex3$name,degree3,evcent3,constraint3)
degree4<-degree(graph4,mode='total')
evcent4<-evcent(graph4,scale=F)$vector
constraint4<-constraint(graph4)
vertex4<-get.vertex.attribute(graph4)
network4<-cbind(vertex4$name,degree4,evcent4,constraint4)
degree5<-degree(graph5,mode='total')
evcent5<-evcent(graph5,scale=F)$vector
constraint5<-constraint(graph5)
vertex5<-get.vertex.attribute(graph5)
network5<-cbind(vertex5$name,degree5,evcent5,constraint5)
degree6<-degree(graph6,mode='total')
evcent6<-evcent(graph6,scale=F)$vector
constraint6<-constraint(graph6)
vertex6<-get.vertex.attribute(graph6)
network6<-cbind(vertex6$name,degree6,evcent6,constraint6)
degree7<-degree(graph7,mode='total')
evcent7<-evcent(graph7,scale=F)$vector
constraint7<-constraint(graph7)
vertex7<-get.vertex.attribute(graph7)
network7<-cbind(vertex7$name,degree7,evcent7,constraint7)
degree8<-degree(graph8,mode='total')
evcent8<-evcent(graph8,scale=F)$vector
constraint8<-constraint(graph8)
vertex8<-get.vertex.attribute(graph8)
network8<-cbind(vertex8$name,degree8,evcent8,constraint8)
degree9<-degree(graph9,mode='total')
evcent9<-evcent(graph9,scale=F)$vector
constraint9<-constraint(graph9)
vertex9<-get.vertex.attribute(graph9)
network9<-cbind(vertex9$name,degree9,evcent9,constraint9)
degree10<-degree(graph10,mode='total')
evcent10<-evcent(graph10,scale=F)$vector
constraint10<-constraint(graph10)
vertex10<-get.vertex.attribute(graph10)
network10<-cbind(vertex10$name,degree10,evcent10,constraint10)
degree11<-degree(graph11,mode='total')
evcent11<-evcent(graph11,scale=F)$vector
constraint11<-constraint(graph11)
vertex11<-get.vertex.attribute(graph11)
network11<-cbind(vertex11$name,degree11,evcent11,constraint11)
degree12<-degree(graph12,mode='total')
evcent12<-evcent(graph12,scale=F)$vector
constraint12<-constraint(graph12)
vertex12<-get.vertex.attribute(graph12)
network12<-cbind(vertex12$name,degree12,evcent12,constraint12)
degree13<-degree(graph13,mode='total')
evcent13<-evcent(graph13,scale=F)$vector
constraint13<-constraint(graph13)
vertex13<-get.vertex.attribute(graph13)
network13<-cbind(vertex13$name,degree13,evcent13,constraint13)
degree14<-degree(graph14,mode='total')
evcent14<-evcent(graph14,scale=F)$vector
constraint14<-constraint(graph14)
vertex14<-get.vertex.attribute(graph14)
network14<-cbind(vertex14$name,degree14,evcent14,constraint14)
degree15<-degree(graph15,mode='total')
evcent15<-evcent(graph15,scale=F)$vector
constraint15<-constraint(graph15)
vertex15<-get.vertex.attribute(graph15)
network15<-cbind(vertex15$name,degree15,evcent15,constraint15)
degree16<-degree(graph16,mode='total')
evcent16<-evcent(graph16,scale=F)$vector
constraint16<-constraint(graph16)
vertex16<-get.vertex.attribute(graph16)
network16<-cbind(vertex16$name,degree16,evcent16,constraint16)
degree17<-degree(graph17,mode='total')
evcent17<-evcent(graph17,scale=F)$vector
constraint17<-constraint(graph17)
vertex17<-get.vertex.attribute(graph17)
network17<-cbind(vertex17$name,degree17,evcent17,constraint17)
degree18<-degree(graph18,mode='total')
evcent18<-evcent(graph18,scale=F)$vector
constraint18<-constraint(graph18)
vertex18<-get.vertex.attribute(graph18)
network18<-cbind(vertex18$name,degree18,evcent18,constraint18)
degree19<-degree(graph19,mode='total')
evcent19<-evcent(graph19,scale=F)$vector
constraint19<-constraint(graph19)
vertex19<-get.vertex.attribute(graph19)
network19<-cbind(vertex19$name,degree19,evcent19,constraint19)



network1<-as.data.frame(network1)
network2<-as.data.frame(network2)
network3<-as.data.frame(network3)
network4<-as.data.frame(network4)
network5<-as.data.frame(network5)
network6<-as.data.frame(network6)
network7<-as.data.frame(network7)
network8<-as.data.frame(network8)
network9<-as.data.frame(network9)
network10<-as.data.frame(network10)
network11<-as.data.frame(network11)
network12<-as.data.frame(network12)
network13<-as.data.frame(network13)
network14<-as.data.frame(network14)
network15<-as.data.frame(network15)
network16<-as.data.frame(network16)
network17<-as.data.frame(network17)
network18<-as.data.frame(network18)
network19<-as.data.frame(network19)

names(network1)<-c('IPC_','degree','evcent','constraint')
names(network2)<-c('IPC_','degree','evcent','constraint')
names(network3)<-c('IPC_','degree','evcent','constraint')
names(network4)<-c('IPC_','degree','evcent','constraint')
names(network5)<-c('IPC_','degree','evcent','constraint')
names(network6)<-c('IPC_','degree','evcent','constraint')
names(network7)<-c('IPC_','degree','evcent','constraint')
names(network8)<-c('IPC_','degree','evcent','constraint')
names(network9)<-c('IPC_','degree','evcent','constraint')
names(network10)<-c('IPC_','degree','evcent','constraint')
names(network11)<-c('IPC_','degree','evcent','constraint')
names(network12)<-c('IPC_','degree','evcent','constraint')
names(network13)<-c('IPC_','degree','evcent','constraint')
names(network14)<-c('IPC_','degree','evcent','constraint')
names(network15)<-c('IPC_','degree','evcent','constraint')
names(network16)<-c('IPC_','degree','evcent','constraint')
names(network17)<-c('IPC_','degree','evcent','constraint')
names(network18)<-c('IPC_','degree','evcent','constraint')
names(network19)<-c('IPC_','degree','evcent','constraint')


f<-unique(a3[c(10)])
ff<-f$assinee_
results<-vector()
results1<-vector()
results2<-vector()
results3<-vector()
results4<-vector()
results5<-vector()
for(assinee_ in ff){
temp2<-a3[which(a3$assinee_==assinee_),]
temp3<-temp2[c(1,5)]
temp4<- merge(temp3,temp3, by="No")
temp5<-temp4[temp4$IPC_.x!=temp4$IPC_.y,]
temp6<-temp5[c(2,3)]
graph<-graph.data.frame(d=temp6,directed=F,vertices=unique(c(temp6$IPC_x,temp6$IPC_y)))
graph1<-simplify(graph)
a<-transitivity(graph1,"local")
t<-mean(a,na.rm=T)
X<-network19[which(network19$IPC_%in%temp2$IPC_),]
K<-mean(as.numeric(X$degree))
L<-mean(as.numeric(X$constraint))
P<-mean(as.numeric(X$evcent))
M<-length(unique(temp2$IPC_))
results<-c(results,assinee_)
results1<-c(results1,t)
results2<-c(results2,K)
results3<-c(results3,L)
results4<-c(results4,P)
results5<-c(results5,M)
}
results<-as.data.frame(results)
results1<-as.data.frame(results1)
results2<-as.data.frame(results2)
results3<-as.data.frame(results3)
results4<-as.data.frame(results4)
results5<-as.data.frame(results5)

results_total19<-cbind(results,results1,results2,results3,results4,results5)

results_total1$year<-'1995'
results_total2$year<-'1996'
results_total3$year<-'1997'
results_total4$year<-'1998'
results_total5$year<-'1999'
results_total6$year<-'2000'
results_total7$year<-'2001'
results_total8$year<-'2002'
results_total9$year<-'2003'
results_total10$year<-'2004'
results_total11$year<-'2005'
results_total12$year<-'2006'
results_total13$year<-'2007'
results_total14$year<-'2008'
results_total15$year<-'2009'
results_total16$year<-'2010'
results_total17$year<-'2011'
results_total18$year<-'2012'
results_total19$year<-'2013'

results_total<-rbind(results_total1,results_total2,results_total3,results_total4,results_total5,results_total6,results_total7,results_total8,results_total9,results_total10,results_total11,results_total12,results_total13,results_total14,results_total15,results_total16,results_total17,results_total18,results_total19)

names(results_total)<-c('assignee_','cohesion','degree','constraint','evcent','node','year')

- Combine the network variables with other variables

results_know_network<-merge(paper_variable,results_total,by=c('assignee_','year'),all.x=T)
write.csv(results_know_network, file = "results_know_network.csv", row.names = FALSE,sep=",")

names(results_total)<-c('assignee_2','cohesion','degree','constraint','evcent','node','year')
results_know_network_dyad<-merge(results_know_network,results_total,by=c('assignee_2','year'),all.x=T)
write.csv(results_know_network_dyad, file = "results_know_network_dyad.csv", row.names = FALSE,sep=",")

- Plot of example knowledge network (including 2012	USEO-C;2010 JSTA-C;2012	TOKE-C;2005	HEWP-C;1998	IBMC-C)

test1<-patent_assignee_IPC_nospace[which(patent_assignee_IPC_nospace$assinee_=='IBMC-C'),]
##a11<-subset(test1,year>1999&year<2005)
##a15<-subset(test1,year>2003&year<2009)
##a18<-subset(test1,year>2006&year<2012)
##a16<-subset(test1,year>2004&year<2010)
a4<-subset(test1,year>1992&year<1998)
mm<-a4[c(1,5)]
temp3<-merge(mm,mm,by='No')
temp4<-temp3[temp3$IPC_.x!=temp3$IPC_.y,]
temp5<-temp4[,c(2,3)] 
graph<-graph.data.frame(d=temp5,directed=F,vertices=unique(c(temp5$IPC_x,temp5$IPC_y)))
graph15<-simplify(graph)
g<-graph15

plot(g,  
     main = 'a', 
layout=layout.reingold.tilford(graph,circular=T),  
     vertex.size=8,    
     vertex.shape='circle',    
     vertex.color=rgb(0.1,0.7,0.8,0.5),
     vertex.label.cex=0.6,    
     vertex.label.color='black',  
     vertex.label.dist=1.5,   
     edge.arrow.size=0,
     edge.width = 0.5, 
     edge.color="black")  
